
******************** TIM AND CUSTOMS MATCH

*** DATASETS NEED TO INCLUDE THE VARIABLES IN LOCALS IN STRING FORMAT

* TIM data macros

local tim_database_d  					/*name of TIM database containing declaration information*/
local tim_database_p 					/*name of TIM database containing products per DUT*/
local tim_database_r 					/*name of TIM database containing routes per DUT*/
local tim_v_dut							/*TIM variable: dut code*/
local tim_vr_cou 						/*TIM variable: starting country route section*/
local tim_vr_cou_end 					/*TIM variable: country where transit ends*/
local tim_vr_cust 						/*TIM variable: customs*/
local tim_vr_cust_int 					/*TIM variable: internal custom were transit starts */
local tim_vd_date 						/*TIM variable: date in DD/MM/YYYY format*/
local tim_vd_name 						/*TIM variable: firm names*/
local tim_vd_buyer 						/*TIM variable: buyer names*/
local tim_vp_prod 						/*TIM variable: product codes at 8-digit HS level*/
local tim_vp_val  						/*TIM variable: transaction value in dollars*/

* Customs data macros

local cus_database   					/*name of customs database*/ 
local cus_v_idfirm 						/*customs variable: firm id*/
local cus_v_name 						/*customs variable: firm names*/
local cus_v_buyer 						/*customs variable: buyer names*/
local cus_v_cou	 						/*customs variable: importing country*/
local cus_v_prod 						/*customs variable: product names*/
local cus_v_val 						/*customs variable: export value (fob)*/
local cus_v_wei	 						/*customs variable: export net weight*/
local cus_v_cust 						/*customs variable: customs names*/
local cus_v_date 						/*customs variable: date in DD/MM/YYYY format*/
local cus_v_trans 						/*customs variable: transport mode*/

* Registry data macros

local reg_database   					/*name of registry database*/ 
local reg_v_idfirm 	 		 	        /*registry variable: firm id*/
local reg_v_mun 						/*registry variable: municipality*/

******************************************************************************

clear all
cd 

capture log close
log using matching, replace text

******* (1) Preparing TIM data

** (i) Extracting SV exiting customs
use `tim_database_r'.dta, clear
keep if `tim_vr_cou' == "El Salvador" 
keep `tim_v_dut' `tim_vr_cust' 
save tim_aux_custom.dta, replace

** (ib) Extracting route --- to be used for figure 3
use `tim_database_r'.dta, clear
keep if `tim_vr_cou' == "El Salvador" 
keep `tim_v_dut' `tim_vr_cust'  `tim_vr_cust_int' `tim_vr_cou_end'
merge 1:1 `tim_v_dut' using `tim_database_d', keep(1 3) nogen keepus( `tim_vd_date')

rename `tim_vr_cust' custom
rename `tim_vr_cust_int' custom_int
rename `tim_vr_cou_end' country_end
gen date_day = date(`tim_vd_date', "DMY")
format date_day %td
keep date_day custom custom_int country_end

save tim_aux_route.dta, replace

** (ii) Extracting products shipments registered on each DUT
use `tim_database_p'.dta, clear
keep `tim_v_dut' `tim_vp_prod' `tim_vp_val'
save tim_aux_prod.dta, replace

** (iii) TIM dataset for matching
use `tim_database_d'.dta, clear
keep `tim_v_dut' `tim_vd_date' `tim_vd_name' `tim_vd_buyer'
merge 1:1 `tim_v_dut' using tim_aux_custom.dta, keep(1 3) nogen
merge m:1 `tim_v_dut' using tim_aux_prod.dta, keep(1 3) nogen
* Formatting variables
drop if length(`tim_vp_prod')<7
replace `tim_vp_prod' = "0"+`tim_vp_prod' if length(`tim_vp_prod')==7
tostring `tim_vp_prod', replace format(%8.0g)
gen date_day = date(`tim_vd_date', "DMY")
format date_day %td
* Cleaning
drop if `tim_vp_val'<=0|`tim_vp_val'==.
drop if `tim_vr_cust'==""|`tim_vd_buyer'==""|`tim_vp_prod'==""|`tim_vr_cust'==""|`tim_vd_name'==""
foreach w in name buyer {
	replace `tim_vd_`w'' = upper(`tim_vd_`w'')
	replace `tim_vd_`w'' = trim(`tim_vd_`w'')
	replace `tim_vd_`w'' = strtrim(`tim_vd_`w'')
	replace `tim_vd_`w'' = ustrtrim(`tim_vd_`w'')
	replace `tim_vd_`w'' = stritrim(`tim_vd_`w'')
	foreach char in "." "," ";" "_" "|" "/" "\" "[" "{" "}" "]" "(" ")" "&" "%" "$" "!" "?" {
		replace `tim_vd_`w'' = subinstr(`tim_vd_`w'', " ", "", .)
	}
}
* Variables for match
rename `tim_vd_name' firm_name
rename `tim_vd_buyer' buyer_name
rename `tim_vp_prod' product
rename `tim_vr_cust' custom
rename `tim_vp_val' trans_value

save tim_base_for_matching.dta, replace
erase tim_aux_prod.dta
erase tim_aux_custom.dta

******* (2) Preparing customs data

use `cus_database'.dta, clear
keep if `cus_v_trans'=="TERRESTRE"|`cus_v_trans'=="MULTIMODAL" // Select land or multi-model transport mode [CHECK IF NAMES DIFFER]
keep `cus_v_name' `cus_v_idfirm' `cus_v_date' `cus_v_buyer' `cus_v_prod' `cus_v_cou' `cus_v_val' `cus_v_wei' `cus_v_cust'
replace `cus_v_prod' = "0"+`cus_v_prod' if length(`cus_v_prod')==7
tostring `cus_v_prod', replace format(%8.0g)
gen date_day = date(`cus_v_date', "DMY")
format date_day %td
* Adding municipality data
rename `cus_v_idfirm' `reg_v_idfirm'
merge m:1 `reg_v_idfirm' using `reg_database', gen(_m) keep(1 3) keepus(`reg_v_mun') 
tab _m
drop _m
rename `reg_v_idfirm' `cus_v_idfirm'
* Cleaning
drop if `cus_v_val'<=0|`cus_v_val'==.|`cus_v_wei'<=0|`cus_v_wei'==.
drop if `cus_v_cust'==""|`cus_v_buyer'==""|`cus_v_prod'==""|`cus_v_cust'==""|`cus_v_name'==""|`reg_v_mun'==""
foreach w in name buyer {
	replace `cus_v_`w'' = upper(`cus_v_`w'')
	replace `cus_v_`w'' = trim(`cus_v_`w'')
	replace `cus_v_`w'' = strtrim(`cus_v_`w'')
	replace `cus_v_`w'' = ustrtrim(`cus_v_`w'')
	replace `cus_v_`w'' = stritrim(`cus_v_`w'')
	foreach char in "," ";" "_" "|" "/" "\" "[" "{" "}" "]" "(" ")" "&" "%" "$" "!" "?" {
		replace `cus_v_`w'' = subinstr(`cus_v_`w'', " ", "", .)
	}
}
* Variables for match
rename `reg_v_mun' munic
rename `cus_v_idfirm' id_firm
rename `cus_v_name' firm_name
rename `cus_v_buyer' buyer_name
rename `cus_v_prod' product
rename `cus_v_cust' custom
rename `cus_v_cou' importer
rename `cus_v_wei' wei_value
rename `cus_v_val' exp_value

save custom_base_for_matching.dta, replace

******* (3) Matching

use custom_base_for_matching.dta, clear
replace date_day = date_day - 4
forvalues y = 1/7 { /*search over 3-day windows*/
	replace date_day = date_day + 1
	merge 1:1 buyer_name buyer_name product custom date using tim_base_for_matching.dta, gen(_c`y') keep(1 3)
	gen tim_use`y' = _c`y'==3
	rename trans_value trans_value_`y'
}
egen tim_use = rowmax(tim_use*)
gen sem_year = hofd(date_day)
format sem_year %th 
gen year = year(dofh(sem_year))
gen semester = halfyear(dofh(sem_year))
egen id_buyer = group(buyer)

keep id_firm id_buyer product custom importer date sem_year year semester exp_value wei_value munic tim_use

save customs_tim_trans_data.dta, replace //raw matched data

log close